library(sjmisc)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tibble::add_case()  masks sjmisc::add_case()
## x dplyr::filter()     masks stats::filter()
## x purrr::is_empty()   masks sjmisc::is_empty()
## x dplyr::lag()        masks stats::lag()
## x tidyr::replace_na() masks sjmisc::replace_na()
f <- "https://raw.githubusercontent.com/difiore/ada-2022-datasets/main/data-wrangling.csv"
d <- read_csv(f, col_names = TRUE) #creates a tibble
## Rows: 213 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): Scientific_Name, Family, Genus, Species, Leaves, Fauna
## dbl (17): Brain_Size_Species_Mean, Body_mass_male_mean, Body_mass_female_mea...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d
## # A tibble: 213 × 23
##    Scientific_Name   Family   Genus   Species Brain_Size_Speci… Body_mass_male_…
##    <chr>             <chr>    <chr>   <chr>               <dbl>            <dbl>
##  1 Allenopithecus_n… Cercopi… Alleno… nigrov…              58.0             6130
##  2 Allocebus_tricho… Cercopi… Alloce… tricho…              NA                 92
##  3 Alouatta_belzebul Atelidae Alouat… belzeb…              52.8             7270
##  4 Alouatta_caraya   Atelidae Alouat… caraya               52.6             6525
##  5 Alouatta_guariba  Atelidae Alouat… guariba              51.7             5800
##  6 Alouatta_palliata Atelidae Alouat… pallia…              49.9             7150
##  7 Alouatta_pigra    Atelidae Alouat… pigra                51.1            11400
##  8 Alouatta_senicul… Atelidae Alouat… senicu…              55.2             6690
##  9 Aotus_azarai      Cebidae  Aotus   azarai               20.7             1180
## 10 Aotus_brumbacki   Cebidae  Aotus   brumba…              NA                 NA
## # … with 203 more rows, and 17 more variables: Body_mass_female_mean <dbl>,
## #   MeanGroupSize <dbl>, AdultMales <dbl>, AdultFemale <dbl>,
## #   GR_MidRangeLat_dd <dbl>, Precip_Mean_mm <dbl>, Temp_Mean_degC <dbl>,
## #   HomeRange_km2 <dbl>, DayLength_km <dbl>, Fruit <dbl>, Leaves <chr>,
## #   Fauna <chr>, Canine_Dimorphism <dbl>, Feed <dbl>, Move <dbl>, Rest <dbl>,
## #   Social <dbl>

creating variable named “BSD” [ratio of male to female body mass]

d$bsd <- (d$Body_mass_male_mean/d$Body_mass_female_mean)
d$bsd
##   [1] 1.9276730 1.0952381 1.3170290 1.5389151 1.2747253 1.3364486 1.7729393
##   [8] 1.2840691 0.9593496        NA 1.0512498 1.0179487 0.8413462 1.1190400
##  [15] 1.0143266 1.0196078 0.9252275 0.9705240 1.0672154 1.0520446 0.8462127
##  [22] 1.0910633 1.1908302 1.1191339 1.1979167 1.1660517 1.0902090 1.0582524
##  [29] 1.0669456 0.9202899 1.0578512 1.0662393 0.9166667        NA        NA
##  [36] 0.9473684 0.9875776        NA 1.1029836 0.9016393 1.3886463 1.3590454
##  [43] 1.3485738 1.2262940 1.2527346 1.6784452 1.8269962 1.7218182 1.7741935
##  [50] 1.6372275 1.5607032 1.6666667 1.4895833 1.3333333 1.2413793 1.6339286
##  [57] 1.7304348 1.6401469 1.7215514 1.7796610 1.5657277 1.4735111 1.4689655
##  [64]        NA 1.7576531 1.3623693 1.2099448 1.0071942 1.0472973 1.4478447
##  [71] 1.4182001 1.3307027 1.2787318 1.2887711 1.1927711 1.4016173 1.1662050
##  [78] 1.0522088 1.9076923 1.0477861 1.0093897        NA 1.1851852 0.9642995
##  [85] 0.9670782 0.8921576 1.0206186 0.9559471 1.0218579 1.0996169 0.9764151
##  [92] 1.0163043 1.0654255 1.0424077 1.0875912 1.6666667 2.3832168        NA
##  [99] 1.1164179 0.9816147 1.6538462 1.0103093 0.9577703 1.0795964 1.0528000
## [106] 0.9371080 1.0110294 0.8523392 1.0370370 1.0000000 0.8714286        NA
## [113] 1.0367893 0.8851571 0.9935760 1.0387205        NA        NA 1.0037360
## [120] 1.3128120 1.3611111 0.9814126 0.9948187 1.4523810 1.6376812 1.2145749
## [127] 1.4168112 1.3661270 1.3690764 1.3958381 1.6978682 1.8080439 1.6153846
## [134] 1.6371865 1.4590164 1.7750000 1.5096416 1.6000000 1.4000000 2.6875000
## [141] 0.9375000 1.0333333 1.0238095 1.2435897 0.9967949 1.9930727 1.0223097
## [148] 1.0122951        NA 1.0784314 1.0846645 1.2287234 1.0720721 1.0817439
## [155] 1.3554217 1.2279106 1.7293233 1.8583333 1.7475728 2.0135135 0.9241848
## [162] 0.9371429 1.0182704 1.0622711        NA        NA 1.3171122 1.0869565
## [169] 1.2278481 2.0531375 2.1689338 0.9955291 0.9981269 1.1190476 0.9848283
## [176] 0.9440454 0.9742351 0.9277014 0.9442897 0.9832215 1.3033175        NA
## [183]        NA 1.5431034 0.9953488 1.0201715 0.9343189 0.9978947 0.9461967
## [190] 0.9052453 1.1697026 1.0270270 0.9359694        NA 1.3600000 1.3191176
## [197] 1.1515152 1.6000000 1.1092437 1.0406504        NA 1.1452991 1.6239316
## [204]        NA 1.1103025 1.1368421 1.0714286 1.0860484 1.0815650 1.0799824
## [211] 1.4444444 1.0778443 1.0312500

creating variable named “sex_ratio” [ratio of adult females to adult males]

d$sex_ratio <- (d$AdultFemale/d$AdultMales)
d$sex_ratio
##   [1]         NA  1.0000000  1.0000000  1.4347826  1.6058394  2.1724138
##   [7]  1.1298701  1.2941176  1.0000000  1.0000000         NA  1.0000000
##  [13]  1.0000000  1.0000000  1.0000000         NA  2.0588235         NA
##  [19]         NA  0.5750000  1.0000000  1.0000000  1.0142857         NA
##  [25]  1.0000000         NA         NA         NA  1.0000000  1.0000000
##  [31]  1.0000000  2.0000000         NA  1.6666667  0.5000000  1.1785714
##  [37]  1.2608696  0.5000000  0.6666667  0.6666667  1.7105263  1.3611111
##  [43]  1.4705882  1.9714286         NA  3.7500000  4.7500000         NA
##  [49]  2.7777778         NA  8.5000000  4.0000000  3.0000000  7.0000000
##  [55]         NA         NA         NA  8.8000000  1.9000000  1.0000000
##  [61]  5.0000000  4.8879310  5.0000000         NA         NA         NA
##  [67]  1.0000000  1.0000000  6.0000000  1.8780488         NA         NA
##  [73]  2.0000000  1.9333333  1.0000000  2.6086957         NA  1.0000000
##  [79]  6.9333333  0.8571429         NA  0.8333333  0.9079903  1.1666667
##  [85]  0.9117647  1.0000000  1.0000000  0.6341463  0.9466667         NA
##  [91]         NA  1.0000000  1.0000000         NA         NA         NA
##  [97]  2.4444444         NA  1.0000000         NA  1.0000000  1.0000000
## [103]  1.1111111  1.0000000         NA         NA  1.0000000  1.0000000
## [109]  1.1232877  1.2307692  0.6250000         NA  0.8333333  1.0000000
## [115]         NA  1.0000000         NA  1.0000000  1.0000000  1.8139535
## [121]         NA         NA  1.0000000         NA         NA         NA
## [127]  2.2553191  5.1470588         NA  5.2888889  6.2083333  3.4827586
## [133]         NA  1.2125000  2.2903226  2.2962963  1.4893617  2.4444444
## [139]         NA         NA  1.0000000  1.0000000  1.0000000  2.0000000
## [145]  1.0000000  3.7000000         NA         NA         NA         NA
## [151]  1.0000000  1.0000000         NA         NA  1.5571429  2.8965517
## [157]  2.4305556  2.7714286  8.2000000  2.0746269  1.0000000  1.0000000
## [163]  1.9148936  5.1730769         NA  5.9606299  2.8571429         NA
## [169]  2.0000000         NA  1.0000000  1.1666667  6.0000000  1.6500000
## [175]         NA         NA  1.2500000  0.9863014         NA  1.6470588
## [181]  2.8461538         NA         NA         NA         NA  0.6842105
## [187]  0.8750000  1.0000000         NA         NA  0.9090909         NA
## [193]  0.6296296  0.6000000         NA  0.8900000  1.8372093  6.3888889
## [199]  1.0000000         NA         NA         NA  2.7333333  6.1666667
## [205] 15.6000000  3.1764706  2.3846154  2.8000000         NA  3.3809524
## [211]  3.8000000         NA  0.5680000

creating variable named “DI” [ratio of day range length to diameter of home range]

d$DI <- (d$DayLength_km/d$HomeRange_km2)
d$DI
##   [1]           NA           NA           NA           NA           NA
##   [6]   1.68421053           NA   5.50000000           NA           NA
##  [11]           NA           NA   6.74285714  33.00000000           NA
##  [16]           NA   1.00877193           NA   2.32876712   1.76470588
##  [21]  46.00000000           NA   0.86538462   3.05084746   0.71428571
##  [26]           NA           NA           NA   7.85714286  14.68085106
##  [31]   2.75862069   6.25000000           NA           NA           NA
##  [36]  11.20000000  44.00000000           NA 100.00000000 145.00000000
##  [41]   3.39622642   2.38636364   5.71428571   1.35294118           NA
##  [46]           NA   2.35294118           NA           NA           NA
##  [51]   9.37500000           NA           NA   1.28000000           NA
##  [56]           NA           NA  10.00000000           NA   7.00000000
##  [61]   1.30434783           NA   1.52173913           NA           NA
##  [66]           NA           NA           NA   1.11111111           NA
##  [71]           NA           NA   0.26495957   1.60000000   1.14000000
##  [76]   0.22316176           NA   8.28651685   0.11331445   7.55769231
##  [81]           NA   3.57142857   7.41463415   2.75925926           NA
##  [86]  16.95238095   2.22000000   1.45119863  10.82857143           NA
##  [91]           NA           NA           NA           NA 176.00000000
##  [96]           NA   0.21339950   1.90476190  43.00000000           NA
## [101]           NA   4.48275862   4.68750000   5.00000000  11.66666667
## [106]   3.46153846   5.65217391   2.08333333   0.39007092  18.00000000
## [111]           NA           NA   5.53846154           NA           NA
## [116]           NA           NA           NA           NA   0.60606061
## [121]           NA           NA           NA           NA           NA
## [126]           NA   3.26086957   0.65034965           NA   0.27072758
## [131]   1.02857143           NA           NA   1.98863636   1.85227273
## [136]           NA   0.63269962           NA           NA   0.11389522
## [141]           NA           NA           NA   3.00000000           NA
## [146]   0.17426273   1.49425287           NA           NA           NA
## [151]           NA           NA           NA           NA   0.08592911
## [156]   0.35023041   0.95238095   0.39464883   0.63327576   0.61403509
## [161]           NA           NA   1.30952381   3.20245399           NA
## [166]   4.70796460   0.99846154           NA  11.71875000           NA
## [171]   0.12886598   1.31578947   7.33333333   2.16428571           NA
## [176]           NA           NA           NA           NA  28.33333333
## [181]           NA           NA   0.05240000           NA           NA
## [186]   8.00000000           NA   9.36363636           NA           NA
## [191]   8.80952381           NA  31.66666667           NA           NA
## [196]  34.00000000   2.64705882   0.38461538   2.39616613  45.00000000
## [201]           NA  28.75000000   2.15517241           NA   2.91666667
## [206]           NA           NA   1.45454545   1.04384134   0.75581395
## [211]           NA           NA   0.73453237

Plotting the relationship between day range length and time spent moving

plot(d$DayLength_km~d$Move)

It appears that there’s not much of a relationship between time spent moving and day range length, however, most species appear to spend less than 40 mins per day moving

Plotting the relationship between day range length and log(time spent moving)

plot(d$DayLength_km~log(d$Move))

There still appears to not be much of a relationship

Plotting the relationship between log(day range length) and time spent moving

plot(log(d$DayLength_km)~d$Move)

Data seems skewed to the left

Plotting the relationship between log(day range length) and log(time spent moving)

plot(log(d$DayLength_km)~log(d$Move))

Log-transforming both variables shows that overall, species that do spend more time moving travel farther than those who don’t

Plotting the relationship between log(day range length) and log(time spent moving), grouped by family

p <- ggplot(data = d, aes(x = log(Move), 
                          y = log(DayLength_km),
                          color = factor(d$Family)
                          ))
p <- p+ xlab("log(Move)") + ylab("log(DayLength_km)")
p <- p + geom_point(na.rm = TRUE)
p <- p + theme(legend.position = "bottom", legend.title = element_blank())
p

adding marginal univariate plots

library(ggExtra)
ggMarginal(p, type = "densigram")

detach(package:ggExtra)

setting up a grid for faceting by a grouping variable

p <- p + facet_wrap(~Family, ncol = 4)
p <- p + theme(legend.position = "none")
p

Plotting the relationship between day range length and time group size

plot(d$DayLength_km~d$MeanGroupSize)

Plotting the relationship between log(day range length) and time group size

plot(log(d$DayLength_km)~d$MeanGroupSize)

Plotting the relationship between day range length and log(time group size)

plot(log(d$DayLength_km)~d$MeanGroupSize)

Plotting the relationship between log(day range length) and log(time group size)

plot(log(d$DayLength_km)~log(d$MeanGroupSize))

Plotting the relationship between day range length and time group size, grouped by family

p <- ggplot(data = d, aes(x = MeanGroupSize, 
                          y = DayLength_km,
                          color = factor(d$Family)
                          ))
p <- p+ xlab("log(MeanGroupSize)") + ylab("log(DayLength_km)")
p <- p + geom_point(na.rm = TRUE)
p <- p + theme(legend.position = "bottom", legend.title = element_blank())
p

adding marginal univariate plots

library(ggExtra)
ggMarginal(p, type = "densigram")

detach(package:ggExtra)

setting up a grid for faceting by a grouping variable

p <- p + facet_wrap(~Family, ncol = 4)
p <- p + theme(legend.position = "none")
p

no apparent relationship between day range length and mean group size

Plotting the relationship between log(day range length) and log(time group size), grouped by family

p <- ggplot(data = d, aes(x = log(MeanGroupSize), 
                          y = log(DayLength_km),
                          color = factor(d$Family)
                          ))
p <- p+ xlab("log(MeanGroupSize)") + ylab("log(DayLength_km)")
p <- p + geom_point(na.rm = TRUE)
p <- p + theme(legend.position = "bottom", legend.title = element_blank())
p

adding marginal univariate plots

library(ggExtra)
ggMarginal(p, type = "densigram")

detach(package:ggExtra)

setting up a grid for faceting by a grouping variable

p <- p + facet_wrap(~Family, ncol = 4)
p <- p + theme(legend.position = "none")
p

Cebidae appears to have positive relationship between day range length and mean group size, when both variables log-transformed

Plotting the relationship between body size dimorphism and canine size dimorphism, both overall and by family

Plotting the relationship between body size dimorphism and canine size dimorphism, overall

plot(d$Canine_Dimorphism~d$bsd)

Plotting the relationship between log(body size dimorphism) and log(canine size dimorphism), overall

plot(log(d$Canine_Dimorphism)~log(d$bsd))

Points more spread out, positive, nonlinear relationship between bsd and Canine dimorphism

Plotting the relationship between body size dimorphism and canine size dimorphism, grouped by family

p <- ggplot(data = d, aes(x = bsd, 
                          y = Canine_Dimorphism,
                          color = factor(d$Family)
                          ))
p <- p+ xlab("bsd") + ylab("Canine_Dimorphism")
p <- p + geom_point(na.rm = TRUE)
p <- p + theme(legend.position = "bottom", legend.title = element_blank())
p

adding marginal univariate plots

library(ggExtra)
ggMarginal(p, type = "densigram")

detach(package:ggExtra)

setting up a grid for faceting by a grouping variable

p <- p + facet_wrap(~Family, ncol = 4)
p <- p + theme(legend.position = "none")
p

Cercopithecidae exhibits somewhat of a linear relationship between bsd and Canine dimorphism, Cebidae does as well yet with fewer data points (limited certainty)

Plotting the relationship between log(body size dimorphism) and log(canine size dimorphism), grouped by family

p <- ggplot(data = d, aes(x = log(bsd), 
                          y = log(Canine_Dimorphism),
                          color = factor(d$Family)
                          ))
p <- p+ xlab("log(bsd)") + ylab("log(Canine_Dimorphism)")
p <- p + geom_point(na.rm = TRUE)
p <- p + theme(legend.position = "bottom", legend.title = element_blank())
p

adding marginal univariate plots

library(ggExtra)
ggMarginal(p, type = "densigram")

detach(package:ggExtra)

setting up a grid for faceting by a grouping variable

p <- p + facet_wrap(~Family, ncol = 4)
p <- p + theme(legend.position = "none")
p

Cercopithecidae also exhibits somewhat of a linear relationship between log (bsd) and log (Canine dimorphism), and Cebidae does as well yet with fewer data points (limited certainty)

Diet strategy analysis

d <- mutate(
  d,
  "diet" = case_when(
    Fruit >= 50 ~ "frugivore", 
    Leaves >= 50 ~ "folivore",
    Fruit < 50 & Leaves < 50 ~ "omnivore"
  )
)

Boxplots of group size vs dietary strategies

boxplot(d$MeanGroupSize~d$diet)

Frugivores, on average, live in smaller groups than folivores, however, there’s much larger variability in frugivore data compared to folivores

One line code using forward pipe operator

s <- mutate(d, Binomial = paste(Genus, Species, sep = " ")) %>%
  select(Binomial,
         Family,
         Brain_Size_Species_Mean,
         Body_mass_male_mean
         ) %>%
  group_by(Family) %>%
  summarise(
    avgbrainsize = mean(Brain_Size_Species_Mean, na.rm = TRUE),
    avgbodymass = mean(Body_mass_male_mean, na.rm = TRUE),
  ) %>%
  arrange(desc(avgbrainsize))
s
## # A tibble: 14 × 3
##    Family          avgbrainsize avgbodymass
##    <chr>                  <dbl>       <dbl>
##  1 Hominidae             410.        98681.
##  2 Hylobatidae           101.         6926.
##  3 Cercopithecidae        85.4        9543.
##  4 Atelidae               80.6        7895.
##  5 Pitheciidae            56.3        1955.
##  6 Daubentonidae          44.8        2620 
##  7 Indriidae              27.3        3638.
##  8 Cebidae                23.9        1012.
##  9 Lemuridae              23.1        2077.
## 10 Lorisidae               8.67        512.
## 11 Lepilemuridae           7.27        792 
## 12 Galagidae               5.96        395.
## 13 Cheirogalidae           4.04        193.
## 14 Tarsiidae               3.26        131